## This file combines the GPSS data with annual ZIP-level pollution level data (PM 2.5) from https://www.earthdata.nasa.gov/data/catalog/sedac-ciesin-sedac-aqdh-pm25o3no2-zipcode-1.00#toc-product-summary, 
## annual housing price data from the Census at the county level, 
## and employment and wage data from the Quarterly Census of Employment and Wages (https://www.bls.gov/web/cewqtr.supp.toc.htm) at the county level.

## Load libraries 
  library(tidyverse)
  library(haven)

## Set working directory and file paths  
  setwd("X:\\RE.E1D\\Secure Folders - Economist\\Corelogic-Phan\\SLR")
  pathg <- ('\\Raw Data\\Gallup\\data\\Gallup\\')
  pathc <- ('\\Raw Data\\Gallup\\data\\Census\\')
  pathp <- ('\\Raw Data\\Gallup\\data\\PM25\\')
  pathq <- ('\\Raw Data\\Gallup\\data\\QCEW\\')
  pathi <- ('\\JPE_replication_jan2025\\data\\intermediate\\')

## File 1: with pollution (limited years) ##

# PM 2.5 Data (year 2000 only)
  pm25 <- read_csv(paste0(getwd(), pathp,"PM25_zipcode.csv")) %>% 
    filter(year == 2000) %>%
    select(!year) %>%
    rename(pm25_zip_y2000 = pm25_zip)
  
  gallup <- read_dta(paste0(getwd(), pathg,"GPSS Environment Final GA Aggregate_stata12.dta")) %>%
    mutate(fips = str_pad(fips, width = 5, side = "left", pad = "0")) %>%
    arrange(fips) %>%
    rename(year = yr, area_fips = fips) 
  
  gallup_pm25_y2000 <- left_join(gallup, pm25, by = "zipcode") 

# PM 2.5 Data (all years)
  pm25 <- read_csv(paste0(getwd(), pathp, "PM25_zipcode.csv"))
  gallup_pm25 <- left_join(gallup_pm25_y2000, pm25, by = c("zipcode", "year")) 

# Merge with housing price (county-level, y2000) data
  hp <- read_csv(paste0(getwd(), pathc, "census_90_00_10_2000county_hpct.csv")) %>%
    filter(year == 2000) %>%
    select(fips_county, Hpct_0p5) %>%
    rename(housing_price_y2000 = Hpct_0p5)
  
  gallup_pm25_hp <- left_join(gallup_pm25, hp, by = c("area_fips" = "fips_county")) 

# Merge with QCEW data
  qcew <- read_csv(paste0(getwd(), pathq, "2000_through_2019_q1.csv"))
  gallup_pm25_hp_qcew <- left_join(gallup_pm25_hp, qcew, by = c("area_fips", "year"))
  
  write_csv(gallup_pm25_hp_qcew, (paste0(getwd(), pathi,"gallup_full.csv")))
  write_dta(gallup_pm25_hp_qcew, (paste0(getwd(), pathi,"gallup_full.dta")))

